Linear Regression

This is the same gradient descent algorithm implemented in Julia.


In [1]:
using DataFrames

In [2]:
houses = readtable("house_prices.csv")
head(houses)


Out[2]:
sqftroomsprice
12104.03.0399900.0
21600.03.0329900.0
32400.03.0369000.0
41416.02.0232000.0
53000.04.0539900.0
61985.04.0299900.0

In [3]:
standardize(v) = (v - mean(v)) / std(v)

m = size(houses)[1]

y = standardize(houses[:price])
X = [ones(m) standardize(houses[:sqft]) standardize(houses[:rooms])];

In [4]:
h(θ, X) = X * θ

J(θ, X, y) = 0.5 * norm(h(θ, X) - y)

function descend(θ, X, y, α = 0.01)
    θ_ = θ - α * X' * (h(θ, X) - y)
    θ_, norm(h(θ, X) - y)
end


Out[4]:
descend (generic function with 2 methods)

In [5]:
function fit(X, y)
    θ = zeros(size(X)[2])
    
    θ, cost = descend(θ, X, y)
    for i in 1:1000
        cost_ = cost
        θ, cost = descend(θ, X, y)
        if cost_ - cost < 1e-8
            break
        end
        
        if i % 10 == 0
            println("epoch: $i, cost: $cost")
        end
    end
    
    println("theta: ")
end


Out[5]:
fit (generic function with 1 method)

In [6]:
fit(X, y)


epoch: 10, cost: 3.5186869504409986
epoch: 20, cost: 3.50507886569909
epoch: 30, cost: 3.5049308911511154
epoch: 40, cost: 3.5049292851904403
theta: [-9.770016826810002e-17,0.8847308497460539,-0.0531436815543813]